import json
import mindspore
from cybertron import BertTokenizer, BertModel
from mindspore import context
import csv

context.set_context(mode=context.GRAPH_MODE, device_target="CPU")


def load_data(file_path):
    result = {}
    with open(file_path) as f:
        reader = csv.reader(f)
        for item in reader:
            if reader.line_num == 1:
                continue
            result[item[0]] = item[1]
    return result


def encode_resource_sentences(resource_sentences):
    tokenizer = BertTokenizer.load('sentence-transformers/all-MiniLM-L6-v2')
    model = BertModel.load('sentence-transformers/all-MiniLM-L6-v2')
    resource_sentences_encode = {}
    for sentence in resource_sentences:
        resource_token = mindspore.Tensor([tokenizer.encode(sentence, add_special_tokens=True)],
                                          mindspore.int32)
        resource_sentences_encode[sentence] = model(resource_token)[1][0].asnumpy().tolist()
    with open("../data/resource_sentence_encode.json", "w", encoding='utf-8') as f:
        json.dump(resource_sentences_encode, f)
    with open("../qaRobot/qa_api/data/resource_sentence_encode.json", "w", encoding='utf-8') as f:
        json.dump(resource_sentences_encode, f)


def dump_q_a(result):
    with open("../data/q_a.json", "w", encoding='utf-8') as f:
        json.dump(result, f)
    with open("../qaRobot/qa_api/data/q_a.json", "w", encoding='utf-8') as f:
        json.dump(result, f)


if __name__ == '__main__':
    print("loading structured QA data")
    r = load_data("../../data/data.csv")
    print("encoding resource sentences")
    encode_resource_sentences(r.keys())
    print("dump all the QA into json")
    dump_q_a(r)
